# Packages ----
library(tidyverse)
── Attaching packages ──────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.1.0       ✔ purrr   0.3.0  
✔ tibble  2.0.1       ✔ dplyr   0.8.0.1
✔ tidyr   0.8.2       ✔ stringr 1.4.0  
✔ readr   1.3.1       ✔ forcats 0.4.0  
── Conflicts ─────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
incar <- read_csv("../data/incarceration_trends.csv")
Parsed with column specification:
cols(
  .default = col_double(),
  state = col_character(),
  county_name = col_character(),
  urbanicity = col_character(),
  region = col_character(),
  division = col_character()
)
See spec(...) for full column specifications.

Let’s Explore

spec(incar)
cols(
  yfips = col_double(),
  year = col_double(),
  fips = col_double(),
  state = col_character(),
  county_name = col_character(),
  total_pop = col_double(),
  total_pop_15to64 = col_double(),
  female_pop_15to64 = col_double(),
  male_pop_15to64 = col_double(),
  asian_pop_15to64 = col_double(),
  black_pop_15to64 = col_double(),
  latino_pop_15to64 = col_double(),
  native_pop_15to64 = col_double(),
  other_pop_15to64 = col_double(),
  white_pop_15to64 = col_double(),
  urbanicity = col_character(),
  region = col_character(),
  division = col_character(),
  commuting_zone = col_double(),
  metro_area = col_double(),
  land_area = col_double(),
  total_jail_adm = col_double(),
  total_jail_adm_dcrp = col_double(),
  female_jail_adm_dcrp = col_double(),
  male_jail_adm_dcrp = col_double(),
  total_jail_pop = col_double(),
  female_jail_pop = col_double(),
  male_jail_pop = col_double(),
  asian_jail_pop = col_double(),
  black_jail_pop = col_double(),
  latino_jail_pop = col_double(),
  native_jail_pop = col_double(),
  white_jail_pop = col_double(),
  total_jail_pretrial = col_double(),
  female_jail_pretrial = col_double(),
  male_jail_pretrial = col_double(),
  jail_from_state_prison = col_double(),
  jail_from_other_state_prison = col_double(),
  jail_from_state_jail = col_double(),
  jail_from_other_state_jail = col_double(),
  jail_from_fed = col_double(),
  jail_from_ice = col_double(),
  total_jail_pop_dcrp = col_double(),
  female_jail_pop_dcrp = col_double(),
  male_jail_pop_dcrp = col_double(),
  total_prison_pop = col_double(),
  female_prison_pop = col_double(),
  male_prison_pop = col_double(),
  asian_prison_pop = col_double(),
  black_prison_pop = col_double(),
  latino_prison_pop = col_double(),
  native_prison_pop = col_double(),
  other_prison_pop = col_double(),
  white_prison_pop = col_double(),
  total_prison_adm = col_double(),
  female_prison_adm = col_double(),
  male_prison_adm = col_double(),
  asian_prison_adm = col_double(),
  black_prison_adm = col_double(),
  latino_prison_adm = col_double(),
  native_prison_adm = col_double(),
  other_prison_adm = col_double(),
  white_prison_adm = col_double(),
  num_facilites = col_double(),
  num_employees = col_double(),
  confined_pop = col_double(),
  capacity = col_double(),
  ucr_population = col_double(),
  index_crime = col_double(),
  violent_crime = col_double(),
  property_crime = col_double(),
  murder_crime = col_double(),
  rape_crime = col_double(),
  robbery_crime = col_double(),
  agr_assault_crime = col_double(),
  burglary_crime = col_double(),
  larceny_crime = col_double(),
  mv_theft_crime = col_double(),
  arson_crime = col_double()
)
dim(incar)
[1] 147533     79

What missing data do I have?

Is missingness related to year?

incar %>% 
  group_by(urbanicity) %>% 
  summarise(
    min = min(total_prison_pop, na.rm = TRUE),
    median = median(total_prison_pop, na.rm = TRUE),
    max = max(total_prison_pop, na.rm = TRUE)
  )

Which counties have the highest prison population per capita

incar %>% 
  filter(year >= 2000, state == "TX") %>%
  inner_join(top_texas_counties) %>% 
  select(year, state, county_name, total_prison_pop, total_pop) %>% 
  mutate(prison_pop_per_capita = total_prison_pop / total_pop) %>% 
  ggplot(aes(x = year, y = prison_pop_per_capita, col = county_name)) +
  geom_line(show.legend = FALSE) +
  scale_y_continuous(labels = scales::percent) +
  facet_wrap(~county_name, scales = "free") +
  theme_bw()
Joining, by = "county_name"

Which county’s have the biggest change?

incar %>% 
  filter(year >= 2000) %>% 
  mutate(prison_pop_per_capita = total_prison_pop / total_pop) %>% 
  group_by(county_name, state) %>% 
  summarise(variance = var(prison_pop_per_capita, na.rm = TRUE),
            spread = max(prison_pop_per_capita, na.rm = TRUE) - min(prison_pop_per_capita, na.rm = TRUE))
LS0tCnRpdGxlOiAiSW5jYXJjZXJhdGlvbiBFREEiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCmBgYHtyIHNldHVwfQojIFBhY2thZ2VzIC0tLS0KbGlicmFyeSh0aWR5dmVyc2UpCmBgYAoKYGBge3IgZGF0YX0KaW5jYXIgPC0gcmVhZF9jc3YoImh0dHBzOi8vcmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbS9yZm9yZGF0YXNjaWVuY2UvdGlkeXR1ZXNkYXkvbWFzdGVyL2RhdGEvMjAxOS8yMDE5LTAxLTIyL2luY2FyY2VyYXRpb25fdHJlbmRzLmNzdiIpCmBgYAoKIyBMZXQncyBFeHBsb3JlCmBgYHtyfQpzcGVjKGluY2FyKQpgYGAKCgpgYGB7cn0KZGltKGluY2FyKQpgYGAKCmBgYHtyfQppbmNhciAlPiUgCiAgY291bnQoc3RhdGUsCiAgICAgICAgY291bnR5X25hbWUsCiAgICAgICAgeWVhciwgc29ydCA9IFRSVUUpICU+JSAKICBjb3VudChuKQpgYGAKCmBgYHtyfQppbmNhciAlPiUgCiAgY291bnQodXJiYW5pY2l0eSkgJT4lIAogIG11dGF0ZShwID0gbiAvIHN1bShuKSkgJT4lIAogIGdncGxvdChhZXMoeCA9IHVyYmFuaWNpdHksIHkgPSBwKSkgKwogIGdlb21fY29sKCkKYGBgCgpgYGB7cn0KaW5jYXIgJT4lIAogIGdyb3VwX2J5KHVyYmFuaWNpdHkpICU+JSAKICBzdW1tYXJpc2UoCiAgICBtaW4gPSBtaW4odG90YWxfcHJpc29uX3BvcCksCiAgICBtZWRpYW4gPSBtZWRpYW4odG90YWxfcHJpc29uX3BvcCksCiAgICBtYXggPSBtYXgodG90YWxfcHJpc29uX3BvcCkKICApCmBgYAoKV2hhdCBtaXNzaW5nIGRhdGEgZG8gSSBoYXZlPwpgYGB7cn0KbWFwX2RmKGluY2FyLCB+bWVhbihpcy5uYSguKSkpICU+JSAKICBnYXRoZXIodmFsdWUgPSAibWlzc2luZ25lc3MiKSAlPiUgCiAgYXJyYW5nZShkZXNjKG1pc3NpbmduZXNzKSkKYGBgCgpJcyBtaXNzaW5nbmVzcyByZWxhdGVkIHRvIHllYXI/CmBgYHtyfQppbmNhciAlPiUgCiAgZ3JvdXBfYnkoeWVhcikgJT4lIAogIHN1bW1hcmlzZV9hbGwofm1lYW4oaXMubmEoLikpKSAlPiUgCiAgZ2F0aGVyKGtleSA9IGNvbHVtbiwgdmFsdWUgPSBtaXNzaW5nbmVzcywgLXllYXIpICU+JSAKICBncm91cF9ieSh5ZWFyKSAlPiUgCiAgc3VtbWFyaXNlKG1pc3NpbmduZXNzID0gbWVhbihtaXNzaW5nbmVzcykpICU+JSAKICBnZ3Bsb3QoYWVzKHggPSB5ZWFyLCB5ID0gbWlzc2luZ25lc3MpKSArCiAgZ2VvbV9jb2woKSArCiAgc2NhbGVfeV9jb250aW51b3VzKGxhYmVscyA9IHNjYWxlczo6cGVyY2VudCkgKwogIHRoZW1lX2J3KCkKYGBgCgpgYGB7cn0KaW5jYXIgJT4lIAogIGdyb3VwX2J5KHVyYmFuaWNpdHkpICU+JSAKICBzdW1tYXJpc2UoCiAgICBtaW4gPSBtaW4odG90YWxfcHJpc29uX3BvcCwgbmEucm0gPSBUUlVFKSwKICAgIG1lZGlhbiA9IG1lZGlhbih0b3RhbF9wcmlzb25fcG9wLCBuYS5ybSA9IFRSVUUpLAogICAgbWF4ID0gbWF4KHRvdGFsX3ByaXNvbl9wb3AsIG5hLnJtID0gVFJVRSkKICApCmBgYAoKYGBge3J9CmluY2FyICU+JSAKICBnZ3Bsb3QoYWVzKHggPSB1cmJhbmljaXR5LCB5ID0gdG90YWxfcHJpc29uX3BvcCkpICsKICBnZW9tX2JveHBsb3QoKQpgYGAKCldoaWNoIGNvdW50aWVzIGhhdmUgdGhlIGhpZ2hlc3QgcHJpc29uIHBvcHVsYXRpb24gcGVyIGNhcGl0YQpgYGB7cn0KdG9wX3RleGFzX2NvdW50aWVzIDwtIGluY2FyICU+JSAKICBmaWx0ZXIoeWVhciA+PSAyMDAwLCBzdGF0ZSA9PSAiVFgiKSAlPiUgCiAgZ3JvdXBfYnkoY291bnR5X25hbWUpICU+JSAKICBzdW1tYXJpc2UobWF4X3BvcCA9IG1heCh0b3RhbF9wb3AsIG5hLnJtID0gVFJVRSkpICU+JSAKICB0b3BfbigxMCwgbWF4X3BvcCkgJT4lIAogIHNlbGVjdChjb3VudHlfbmFtZSkKYGBgCgoKYGBge3J9CmluY2FyICU+JSAKICBmaWx0ZXIoeWVhciA+PSAyMDAwLCBzdGF0ZSA9PSAiVFgiKSAlPiUKICBpbm5lcl9qb2luKHRvcF90ZXhhc19jb3VudGllcykgJT4lIAogIHNlbGVjdCh5ZWFyLCBzdGF0ZSwgY291bnR5X25hbWUsIHRvdGFsX3ByaXNvbl9wb3AsIHRvdGFsX3BvcCkgJT4lIAogIG11dGF0ZShwcmlzb25fcG9wX3Blcl9jYXBpdGEgPSB0b3RhbF9wcmlzb25fcG9wIC8gdG90YWxfcG9wKSAlPiUgCiAgZ2dwbG90KGFlcyh4ID0geWVhciwgeSA9IHByaXNvbl9wb3BfcGVyX2NhcGl0YSwgY29sID0gY291bnR5X25hbWUpKSArCiAgZ2VvbV9saW5lKHNob3cubGVnZW5kID0gRkFMU0UpICsKICBzY2FsZV95X2NvbnRpbnVvdXMobGFiZWxzID0gc2NhbGVzOjpwZXJjZW50KSArCiAgZmFjZXRfd3JhcCh+Y291bnR5X25hbWUsIHNjYWxlcyA9ICJmcmVlIikgKwogIHRoZW1lX2J3KCkKICAKYGBgCgpXaGljaCBjb3VudHkncyBoYXZlIHRoZSBiaWdnZXN0IGNoYW5nZT8KYGBge3J9CmluY2FyICU+JSAKICBmaWx0ZXIoeWVhciA+PSAyMDAwKSAlPiUgCiAgbXV0YXRlKHByaXNvbl9wb3BfcGVyX2NhcGl0YSA9IHRvdGFsX3ByaXNvbl9wb3AgLyB0b3RhbF9wb3ApICU+JSAKICBncm91cF9ieShjb3VudHlfbmFtZSwgc3RhdGUpICU+JSAKICBzdW1tYXJpc2UodmFyaWFuY2UgPSB2YXIocHJpc29uX3BvcF9wZXJfY2FwaXRhLCBuYS5ybSA9IFRSVUUpLAogICAgICAgICAgICBzcHJlYWQgPSBtYXgocHJpc29uX3BvcF9wZXJfY2FwaXRhLCBuYS5ybSA9IFRSVUUpIC0gbWluKHByaXNvbl9wb3BfcGVyX2NhcGl0YSwgbmEucm0gPSBUUlVFKSkKYGBgCgo=